Integration¶

Sanorama Hie et al., 2019
GitHub
Tutorial external API
External external API tutorial

A fix to run scran pooling normalization computeSumFactors in current python environment.

In [1]:
import scanpy as sc
import scanorama

import numpy as np
import pandas as pd

import os
In [2]:
# Working directory 
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')
In [3]:
# rpy2 
os.environ['R_HOME'] = '/home/fdeckert/bin/miniconda3/envs/p.3.8.12-FD20200109SPLENO/lib/R'
In [4]:
# Plotting 
import rpy2.robjects as robjects
color_load = robjects.r.source('plotting_global.R')
color = dict()
for i in range(len(color_load[0])):
    color[color_load[0].names[i]] = {key : color_load[0][i].rx2(key)[0] for key in color_load[0][i].names}

sc.set_figure_params(figsize=(5, 5))

Parameter¶

In [5]:
# Scanorama 
dimred=100
knn=20

# Scanpy 
n_neighbors=50

Scanorama¶

In [6]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
adata = adata.raw.to_adata()
In [7]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [8]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
In [9]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
    adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
    sc.pp.scale(adata_tmp)
    adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
In [10]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama
Found 14772 genes among all datasets
Processing datasets (0, 1)
Processing datasets (2, 3)
Processing datasets (1, 3)
Processing datasets (0, 2)
Processing datasets (0, 3)
Processing datasets (1, 2)
In [11]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 8000)¶

In [12]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_8000 = list(adata.uns['hvg_int_8000'])
adata = adata.raw.to_adata()
In [13]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [14]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_8000]
In [15]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
    adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
    sc.pp.scale(adata_tmp)
    adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
In [16]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama
Found 8000 genes among all datasets
Processing datasets (2, 3)
Processing datasets (1, 3)
Processing datasets (0, 1)
Processing datasets (0, 2)
Processing datasets (0, 3)
Processing datasets (1, 2)
In [17]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 6000)¶

In [18]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_6000 = list(adata.uns['hvg_int_6000'])
adata = adata.raw.to_adata()
In [19]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [20]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_6000]
In [21]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
    adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
    sc.pp.scale(adata_tmp)
    adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
In [22]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama
Found 6000 genes among all datasets
Processing datasets (2, 3)
Processing datasets (1, 3)
Processing datasets (0, 1)
Processing datasets (0, 2)
Processing datasets (0, 3)
Processing datasets (1, 2)
In [23]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 4000)¶

In [24]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_4000 = list(adata.uns['hvg_int_4000'])
adata = adata.raw.to_adata()
In [25]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [26]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_4000]
In [27]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
    adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
    sc.pp.scale(adata_tmp)
    adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
In [28]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama
Found 4000 genes among all datasets
Processing datasets (2, 3)
Processing datasets (1, 3)
Processing datasets (0, 1)
Processing datasets (0, 2)
Processing datasets (0, 3)
Processing datasets (1, 2)
In [29]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)

Scanorama (HVG 2000)¶

In [30]:
adata = sc.read_h5ad('data/object/so_sct.h5ad')
hvg_2000 = list(adata.uns['hvg_int_2000'])
adata = adata.raw.to_adata()
In [31]:
def set_color(categories): 
    
    categories = [x for x in categories if x in list(adata.obs.columns)]

    for category in categories: 
        
        adata.obs[category] = pd.Series(adata.obs[category], dtype='category')
        
        keys = list(color[category].keys())
        keys = [x for x in keys if x in list(adata.obs[category])]

        adata.obs[category] = adata.obs[category].cat.reorder_categories(keys)
        adata.uns[category+'_colors'] = np.array([color[category].get(key) for key in keys], dtype=object)
        
# Set colors
set_color(list(color.keys()))
In [32]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata = adata[:,hvg_2000]
In [33]:
adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
adata_sub = dict()
for sample_group in adata.obs['integrate'].unique():
    adata_tmp = adata[adata.obs['integrate']==sample_group].copy()
    sc.pp.scale(adata_tmp)
    adata_sub[sample_group] = adata_tmp
adata_sub = list(adata_sub.values())
/tmp/ipykernel_1211665/3130535487.py:1: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.
  adata.obs['integrate'] = adata.obs['treatment'].astype(str)+adata.obs['sample_rep'].astype(str)
In [34]:
# Run Scanorama
scanorama.integrate_scanpy(adata_sub, dimred=dimred, knn=knn, verbose=True)

# Concatenate scanorama output 
X_scanorama = [ad.obsm['X_scanorama'] for ad in adata_sub]
X_scanorama = np.concatenate(X_scanorama)

obs_names = [ad.obs_names for ad in adata_sub]
obs_names = np.concatenate(obs_names)
all(obs_names==adata.obs_names)

# Add X_scanorama integration to adata 
adata.obsm["X_scanorama"] = X_scanorama
Found 2000 genes among all datasets
Processing datasets (1, 3)
Processing datasets (2, 3)
Processing datasets (0, 1)
Processing datasets (0, 2)
Processing datasets (0, 3)
Processing datasets (1, 2)
In [35]:
# # Dimensional reduction and clustering 
sc.pp.neighbors(adata, n_neighbors=n_neighbors, n_pcs=dimred, use_rep='X_scanorama')
sc.tl.leiden(adata, resolution=1)
sc.tl.louvain(adata, resolution=1)
sc.tl.umap(adata)

# Plot 
sc.pl.umap(adata, color=['louvain', 'leiden', 'tissue', 'treatment', 'label_fine_haemosphere', 'sample_rep', 'cc_phase_class', 'pHb_RNA', 'pRb_RNA'], wspace=0.5, ncols=3)